Benchmarking RDF against MDAnalysis¶
The algorithms in freud are highly efficient and rely on parallelized C++ code. Below, we show a benchmark of freud.density.RDF
against MDAnalysis.analysis.rdf
. This benchmark was run on an Intel(R) Core(TM) i3-8100B CPU @ 3.60GHz.
[1]:
import freud
import gsd
import MDAnalysis
import MDAnalysis.analysis.rdf
import multiprocessing as mp
import numpy as np
import matplotlib.pyplot as plt
import timeit
from tqdm import tqdm
[2]:
trajectory_filename = 'data/rdf_benchmark.gsd'
r_max = 5
r_min = 0.1
nbins = 75
[3]:
trajectory = MDAnalysis.coordinates.GSD.GSDReader(trajectory_filename)
topology = MDAnalysis.core.topology.Topology(n_atoms=trajectory[0].n_atoms)
u = MDAnalysis.as_Universe(topology, trajectory_filename)
rdf = MDAnalysis.analysis.rdf.InterRDF(g1=u.atoms, g2=u.atoms,
nbins=nbins,
range=(r_min, r_max)).run()
[4]:
plt.plot(rdf.bins, rdf.rdf)
plt.show()
[5]:
freud_rdf = freud.density.RDF(bins=nbins, r_max=r_max, r_min=r_min)
for frame in trajectory:
freud_rdf.compute(system=frame, reset=False)
freud_rdf
[5]:
Timing Functions¶
[6]:
def time_statement(stmt, repeat=3, number=1, **kwargs):
timer = timeit.Timer(stmt=stmt, globals=kwargs)
times = timer.repeat(repeat, number)
return np.mean(times), np.std(times)
[7]:
def time_mdanalysis_rdf(trajectory_filename, r_max, r_min, nbins):
trajectory = MDAnalysis.coordinates.GSD.GSDReader(trajectory_filename)
frame = trajectory[0]
topology = MDAnalysis.core.topology.Topology(n_atoms=frame.n_atoms)
u = MDAnalysis.as_Universe(topology, trajectory_filename)
code = """rdf = MDAnalysis.analysis.rdf.InterRDF(g1=u.atoms, g2=u.atoms, nbins=nbins, range=(r_min, r_max)).run()"""
return time_statement(code, MDAnalysis=MDAnalysis, u=u, r_max=r_max, r_min=r_min, nbins=nbins)
[8]:
def time_freud_rdf(trajectory_filename, r_max, r_min, nbins):
trajectory = MDAnalysis.coordinates.GSD.GSDReader(trajectory_filename)
code = """
rdf = freud.density.RDF(bins=nbins, r_max=r_max, r_min=r_min)
for frame in trajectory:
rdf.compute(system=frame, reset=False)"""
return time_statement(code, freud=freud, trajectory=trajectory, r_max=r_max, r_min=r_min, nbins=nbins)
[9]:
# Test timing functions
params = dict(
trajectory_filename=trajectory_filename,
r_max=r_max,
r_min=r_min,
nbins=nbins)
def system_size(trajectory_filename, **kwargs):
with gsd.hoomd.open(params['trajectory_filename'], 'rb') as trajectory:
return {'frames': len(trajectory),
'particles': len(trajectory[0].particles.position)}
print(system_size(**params))
mdanalysis_rdf_runtime = time_mdanalysis_rdf(**params)
print('MDAnalysis:', mdanalysis_rdf_runtime)
freud_rdf_runtime = time_freud_rdf(**params)
print('freud:', freud_rdf_runtime)
{'frames': 5, 'particles': 15625}
MDAnalysis: (18.00504128, 0.054983033593944214)
freud: (2.8556541516666747, 0.05481114115556424)
Perform Measurements¶
[10]:
def measure_runtime_scaling_r_max(r_maxes, **params):
result_times = []
for r_max in tqdm(r_maxes):
params.update(dict(r_max=r_max))
freud.parallel.set_num_threads(1)
freud_single = time_freud_rdf(**params)
freud.parallel.set_num_threads(0)
result_times.append((time_mdanalysis_rdf(**params), freud_single, time_freud_rdf(**params)))
return np.asarray(result_times)
[11]:
def plot_result_times(result_times, r_maxes, frames, particles):
plt.figure(figsize=(6, 4), dpi=200)
plt.errorbar(r_maxes, result_times[:, 0, 0], result_times[:, 0, 1],
label="MDAnalysis v{} analysis.rdf.InterRDF".format(MDAnalysis.__version__))
plt.errorbar(r_maxes, result_times[:, 1, 0], result_times[:, 1, 1],
label="freud v{} density.RDF, 1 thread".format(freud.__version__))
plt.errorbar(r_maxes, result_times[:, 2, 0], result_times[:, 2, 1],
label="freud v{} density.RDF, {} threads".format(freud.__version__, mp.cpu_count()))
plt.title(r'RDF for {} frames, {} particles'.format(frames, particles))
plt.xlabel(r'RDF $r_{{max}}$')
plt.ylabel(r'Average Runtime (s)')
plt.yscale('log')
plt.legend()
plt.show()
[12]:
r_maxes = [0.2, 0.3, 0.5, 1, 2, 3]
[13]:
result_times = measure_runtime_scaling_r_max(r_maxes, **params)
plot_result_times(result_times, r_maxes, **system_size(params['trajectory_filename']))
100%|██████████| 6/6 [00:31<00:00, 5.28s/it]
[14]:
print('Speedup, parallel freud / serial freud: {:.3f}x'.format(np.average(result_times[:, 1, 0] / result_times[:, 2, 0])))
print('Speedup, parallel freud / MDAnalysis: {:.3f}x'.format(np.average(result_times[:, 0, 0] / result_times[:, 2, 0])))
print('Speedup, serial freud / MDAnalysis: {:.3f}x'.format(np.average(result_times[:, 0, 0] / result_times[:, 1, 0])))
Speedup, parallel freud / serial freud: 2.900x
Speedup, parallel freud / MDAnalysis: 7.182x
Speedup, serial freud / MDAnalysis: 2.619x